/*******************************************************************************

  Paying Outsourced Labor: Direct Evidence from Linked Temp Agency-Worker-Client Data

  By Andres Drenik, Simon Jäger, Pascuel Plotkin and Benjamin Schoefer
	January 7th, 2021

	DESCRIPTION: Cleans the Registro Dataset to prepare for Merge with SIPA

*******************************************************************************/




/********************************************************************************
***** Preliminaries
********************************************************************************/
set more off
cap log close
local curr_date = c(current_date)
log using "${logs}/01_Registro_Merge_Prep`curr_date'", replace

/****************************************************************************************
* Looping over the 2008 - 2017 Registro Datasets
****************************************************************************************/

forvalues y = 2008/2017{

	use "${input}/registro_eventual_`y'_append", clear

	gen bimester_number = substr(Bimestre,1,1)
	destring bimester_number, replace

    *Destring and Rename variables
	rename CUIT cuit_empl
	rename CUIL cuil_trab
	rename UsuarioCUIT cuit_user_firm
	replace cuil_trab = subinstr(cuil_trab, "-", "",.)
	replace cuit_user_firm = subinstr(cuit_user_firm, "-", "",.)
	destring cuil_trab, replace
	destring cuit_user_firm, replace

	*Generate year variable
	gen year = `y'

    *Keep only relevant variables
	keep cuil_trab cuit_empl bimester year cuit_user_firm

	*Create id for worker-bimester-employer cell
	gegen id = group(cuil_trab bimester cuit_empl)
	drop if id == .

	*We identify as cuit_user_firm = 1 the cuit_user_firms that are missing. This is because we don't want to confuse them with the windows that only have 1 user firm.
	replace cuit_user_firm = 1 if cuit_user_firm == .

	*Flag multiple user firms by windows
	bys id cuit_user_firm : gen y = _n == 1
	gegen distinct_user_firm = sum(y), by(id)
	gen multiple_user_firm = (distinct_user_firm > 1 & distinct_user_firm !=.)

	drop y distinct_user_firm

	*This is to convert the dataset to wide (1 variable for each user_firm in the bimester window).
	bys id (cuit_user_firm): gen user_firm_n = _n

	*Impose Max 5 user firms per bimester-worker-tempfirm
	drop if user_firm_n > 5

    *Reshape to wide
	reshape wide cuit_user_firm, i(id) j(user_firm_n)

	expand 2, generate(new)

	gen month = .

	replace month = 1 if bimester == 1
	replace month = 3 if bimester == 2
	replace month = 5 if bimester == 3
	replace month = 7 if bimester == 4
	replace month = 9 if bimester == 5
	replace month = 11 if bimester == 6

	replace month = month + 1 if new == 1

	gen date = ym(year, month)
	format date %tm

	save "${intermediate_data_clea}/Registro_eventual_for_merge_`y'.dta", replace

}

log close
